cd "C:\Users\jedwab\Desktop\Replication Files for The Employment Profile of Cities around the World WD\Other Files\UNSNA"

* This do-file creates "gdpsh_unsna" and "gdpsh_unsnaama"

* UN - only GVA
* Gross value added (GVA) is defined as the value of output less the value of intermediate consumption. Thus, Gross Domestic Product (GDP) of any nation represents the sum total of gross value added (GVA) in all the sectors of that economy during the said year after adjusting for taxes and subsidies. .
* Close to GDP

***************************
***** UN-SNA - ISIC 3 *****
***************************

* Table 2.3 Output, gross value added, and fixed assets by industries at current prices (ISIC Rev. 3)Go toSearch glossaries
* Source: National Accounts Official Country Data | United Nations Statistics Division
* https://data.un.org/Data.aspx?q=gross+value+added&d=SNA&f=group_code%3a203
* Based on ISIC 3 *
* 606,406 records

* Has for all the sectors
* We focus on MFG and FIRE
clear
import delimited "UNdata_ISIC3.csv", clear 
tab subitem
gen type = "fire" if subitem == "Financial intermediation; real estate, renting and business activities (J+K)"
replace type = "mfg" if subitem == "Manufacturing (D)"
replace type = "total" if subitem == "Total Economy"
drop if type == ""
drop if year == .
ren country country
keep country year type value snasystem fiscalyeartype series
reshape wide value, i(country year series snasystem fiscalyeartype) j(type) string
sort country year series snasystem fiscalyeartype
gen gdpsh_mfg = valuemfg/valuetotal*100 
gen gdpsh_fire = valuefire/valuetotal*100
sum gdpsh_*, d
* ok
drop value*
sort country year series snasystem fiscalyeartype
tab series snasystem
* Different series and systems
* We take the average per country year
replace country = "Sudan" if country == "Sudan (up to 2011)"
replace country = "Yemen" if country == "Yemen Arab Republic [former]"
collapse (mean) gdpsh_*, by(country year)
sort country year
*bysort country: keep if _n == 1
gen country_wup = country
drop country
*replace country_wup == "Afghanistan" if country_wup == ""
*replace country_wup == "Cambodia" if country_wup == ""
*replace country_wup == "China" if country_wup == ""
replace country_wup = "China, Hong Kong SAR" if country_wup == "China, Hong Kong Special Administrative Region"
replace country_wup = "China, Macao SAR" if country_wup == "China, Macao Special Administrative Region"
*replace country_wup == "China, Taiwan Province of China" if country_wup == ""
*replace country_wup == "Comoros" if country_wup == ""
replace country_wup = "Côte d'Ivoire" if country_wup == "CÃ´te d'Ivoire"
*replace country_wup == "Democratic Republic of the Congo" if country_wup == ""
*replace country_wup == "Eritrea" if country_wup == ""
*replace country_wup == "Ethiopia" if country_wup == ""
*replace country_wup == "Guyana" if country_wup == ""
*replace country_wup == "Haiti" if country_wup == ""
*replace country_wup == "Indonesia" if country_wup == ""
*replace country_wup == "Lao People's Democratic Republic" if country_wup == ""
*replace country_wup == "Liberia" if country_wup == ""
*replace country_wup == "Madagascar" if country_wup == ""
*replace country_wup == "Malawi" if country_wup == ""
*replace country_wup == "Nepal" if country_wup == ""
*replace country_wup == "Somalia" if country_wup == ""
replace country_wup = "Swaziland" if country_wup == "Eswatini"
*replace country_wup == "Uganda" if country_wup == ""
replace country_wup = "United Republic of Tanzania" if country_wup == "Tanzania - Mainland"
*replace country_wup == "Viet Nam" if country_wup == ""
*sort country_wup
sort country_wup year
save temp, replace
* 1946-2019
clear
import excel "list_years.xlsx", sheet("Sheet1") firstrow clear
save listyears, replace
use temp, clear
bysort country_wup: keep if _n == 1
keep country_wup
cross using listyears
tab year
sort country_wup year
merge country_wup year using temp
tab _m
drop _m
sort country_wup year
tab year
* We use MA
sort country year
foreach V in gdpsh_mfg gdpsh_fire {
foreach N in 1 2 3 4 5 {
sort country year
bysort country: gen lag`N'`V' = `V'[_n-`N']
sort country year
bysort country: gen lead`N'`V' = `V'[_n+`N']
}
}
ren gdpsh_mfg curgdpsh_mfg 
ren gdpsh_fire curgdpsh_fire
order country year lag5*mfg lag4*mfg lag3*mfg lag2*mfg lag1*mfg cur*mfg lead1*mfg lead2*mfg lead3*mfg lead4*mfg lead5*mfg lag5*fire lag4*fire lag3*fire lag2*fire lag1*fire cur*fire lead1*fire lead2*fire lead3*fire lead4*fire lead5*fire
foreach N in 1 2 3 4 5 {
egen gdpsh_mfg_ma`N'_unisic3 = rmean(lag`N'gdpsh_mfg-lead`N'gdpsh_mfg)
}
foreach N in 1 2 3 4 5 {
egen gdpsh_fire_ma`N'_unisic3 = rmean(lag`N'gdpsh_fire-lead`N'gdpsh_fire)
}
keep country year *_ma*_unisic3
codebook *fire_ma*
* as expected, ma5 has fewer missings
gen lastyr = substr(string(year),4,1)
tab lastyr
keep if lastyr == "0" | lastyr == "5"
drop last 
tab year
sort country year
save GDP_mfgfire_undata, replace

*** List 116 countries ***

*use "list.dta", clear
*keep if year == 2020
*keep country_wup year
*sort country_wup
*save list116, replace

*use list116, clear
*sort country_wup
*merge country_wup using GDP_mfgfire_undata 
*tab _m
*tab country_wup if _m == 1
*tab country_wup if _m == 2
*drop _m
* ok, other countries are missing 

*** We merge with the main data ***

use "list.dta", clear
sort country_wup year
merge country_wup year using GDP_mfgfire_undata
tab _m
tab country_wup if _m == 2 
* ok, not in the sample
drop if _m == 2
drop _m
drop *_ma1_* *_ma3_* *_ma4_*
sort country_wup year
tab year
codebook gdpsh_*_ma5_unisic3 if year == 2020
* missing for 71-72
codebook gdpsh_*_ma5_unisic3 if year == 2015
* missing for 49-50
codebook gdpsh_*_ma5_unisic3 if year == 2010
* missing for 40-41
sort country_wup year
foreach N in 2 5 {
foreach X in mfg fire {
bysort country_wup: gen lag1gdpsh_`X'_ma`N'_unisic3 = gdpsh_`X'_ma`N'_unisic3[_n-1]
bysort country_wup: gen lag2gdpsh_`X'_ma`N'_unisic3 = gdpsh_`X'_ma`N'_unisic3[_n-2]
}
}
* missing for 41-40
sort country_wup year
save gdpsh_mfg_fire_unisic3, replace

***************************
***** UN-SNA - ISIC 4 *****
***************************

*https://data.un.org/Data.aspx?q=gross+value+added&d=SNA&f=group_code%3a206
*Table 2.6 Output, gross value added and fixed assets by industries at current prices (ISIC Rev. 4)Go toSearch glossaries
*Source: National Accounts Official Country Data | United Nations Statistics Division

* Has for all the sectors
* We focus on MFG and FIRE
clear
import delimited "UNdata_ISIC4.csv", clear 
tab subitem
gen type = "fi" if subitem == "Financial and insurance activities (K)"
replace type = "re" if subitem == "Real estate activities (L)"
replace type = "ic" if subitem == "Information and communication (J)"
replace type = "pro" if subitem == "Professional, scientific, technical, administrative and support service activities (M+N)"
replace type = "mfg" if subitem == "Manufacturing (C)"
replace type = "total" if subitem == "Total Economy"
drop if type == ""
drop if year == .
ren country country
keep country year type value snasystem fiscalyeartype series
reshape wide value, i(country year series snasystem fiscalyeartype) j(type) string
sort country year series snasystem fiscalyeartype
foreach X in fi ic mfg pro re {
gen gdpsh_`X' = value`X'/valuetotal*100 
sum gdpsh_`X', d
}
* ok
drop value*
sort country year series snasystem fiscalyeartype
tab series snasystem
* Different series and systems
* We take the average per country year
replace country = "Sudan" if country == "Sudan (up to 2011)"
replace country = "Yemen" if country == "Yemen Arab Republic [former]"
collapse (mean) gdpsh_*, by(country year)
sort country year
*bysort country: keep if _n == 1
gen country_wup = country
drop country
replace country_wup = "China, Hong Kong SAR" if country_wup == "China, Hong Kong Special Administrative Region"
replace country_wup = "China, Macao SAR" if country_wup == "China, Macao Special Administrative Region"
replace country_wup = "Côte d'Ivoire" if country_wup == "CÃ´te d'Ivoire"
replace country_wup = "Swaziland" if country_wup == "Eswatini"
replace country_wup = "United Republic of Tanzania" if country_wup == "Tanzania - Mainland"
sort country_wup year
save temp, replace
tab year
* 1950-2019
clear
import excel "list_years.xlsx", sheet("Sheet1") firstrow clear
save listyears, replace
use temp, clear
bysort country_wup: keep if _n == 1
keep country_wup
cross using listyears
tab year
sort country_wup year
merge country_wup year using temp
tab _m
drop _m
sort country_wup year
tab year
* We use MA
sort country year
foreach V in gdpsh_mfg gdpsh_ic gdpsh_fi gdpsh_re gdpsh_pro {
foreach N in 1 2 3 4 5 {
sort country year
bysort country: gen lag`N'`V' = `V'[_n-`N']
sort country year
bysort country: gen lead`N'`V' = `V'[_n+`N']
}
}
foreach X in fi ic mfg pro re {
ren gdpsh_`X' curgdpsh_`X'
}
foreach X in fi ic mfg pro re {
order country year lag5*`X' lag4*`X' lag3*`X' lag2*`X' lag1*`X' cur*`X' lead1*`X' lead2*`X' lead3*`X' lead4*`X' lead5*`X' 
}
foreach X in fi ic mfg pro re {
foreach N in 1 2 3 4 5 {
egen gdpsh_`X'_ma`N'_unisic4 = rmean(lag`N'gdpsh_`X'-lead`N'gdpsh_`X')
}
}
keep country year *_ma*_unisic4
*codebook *fire_ma*
* as expected, ma5 has fewer missings
gen lastyr = substr(string(year),4,1)
tab lastyr
keep if lastyr == "0" | lastyr == "5"
drop last 
tab year
sort country year
save GDP_mfgfire_undata, replace

*** We merge with the main data ***

use "list.dta", clear
sort country_wup year
merge country_wup year using GDP_mfgfire_undata
tab _m
tab country_wup if _m == 2 
* ok, not in the sample
drop if _m == 2
drop _m
drop *_ma1_* *_ma3_* *_ma4_*
sort country_wup year
tab year
foreach N in 2 5 {
egen gdpsh_fire_ma`N'_unisic4 = rsum(gdpsh_fi_ma`N'_unisic4 gdpsh_re_ma`N'_unisic4 gdpsh_pro_ma`N'_unisic4) if gdpsh_fi_ma`N'_unisic4 !=. | gdpsh_re_ma`N'_unisic4 !=. | gdpsh_pro_ma`N'_unisic4 !=. 
}
codebook gdpsh_mfg_ma5_unisic4 gdpsh_fire_ma5_unisic4 if year == 2020
* missing for 77-79
codebook gdpsh_mfg_ma5_unisic4 gdpsh_fire_ma5_unisic4 if year == 2015
* missing for 75-77
codebook gdpsh_mfg_ma5_unisic4 gdpsh_fire_ma5_unisic4 if year == 2010
* missing for 75-77
sort country_wup year
foreach N in 2 5 {
foreach X in mfg fire ic {
bysort country_wup: gen lag1gdpsh_`X'_ma`N'_unisic4 = gdpsh_`X'_ma`N'_unisic4[_n-1]
bysort country_wup: gen lag2gdpsh_`X'_ma`N'_unisic4 = gdpsh_`X'_ma`N'_unisic4[_n-2]
}
}
drop *_fi_* *_re_* *_pro_*
* missing for 77-75
sort country_wup year
save gdpsh_mfg_fire_unisic4, replace

*** We combine ***

use gdpsh_mfg_fire_unisic3, clear
sort country_wup year
merge country_wup year using gdpsh_mfg_fire_unisic4
tab _m
drop _m
corr gdpsh_mfg_ma5_unisic*
* 0.91
corr gdpsh_fire_ma5_unisic*
* 0.77
*twoway (scatter gdpsh_fire_ma5_unisic4 gdpsh_fire_ma5_unisic3 if year == 2020, mlabel(countrylabel))
*twoway (scatter gdpsh_fire_ma5_unisic4 gdpsh_fire_ma5_unisic3 if year == 2015, mlabel(countrylabel))
*twoway (scatter gdpsh_fire_ma5_unisic4 gdpsh_fire_ma5_unisic3 if year == 2010, mlabel(countrylabel))
*
*foreach N in 2 5 {
*foreach X in 2020 2015 2010 {
*corr gdpsh_fire_ma`N'_unisic4 gdpsh_fire_ma`N'_unisic3 if year == `X'
*}
*}
* 0.69 0.80 0.74
* ok - use this
* use the mean
keep year LAC country* gdpsh* lag*gdpsh*
foreach N in 2 5 {
egen gdpsh_mfg_ma`N'_un = rmean(gdpsh_mfg_ma`N'_unisic*)
egen lag1gdpsh_mfg_ma`N'_un = rmean(lag1gdpsh_mfg_ma`N'_unisic*)
egen lag2gdpsh_mfg_ma`N'_un = rmean(lag2gdpsh_mfg_ma`N'_unisic*)
egen gdpsh_fire_ma`N'_un = rmean(gdpsh_fire_ma`N'_unisic*)
egen lag1gdpsh_fire_ma`N'_un = rmean(lag1gdpsh_fire_ma`N'_unisic*)
egen lag2gdpsh_fire_ma`N'_un = rmean(lag2gdpsh_fire_ma`N'_unisic*)
egen gdpsh_ic_ma`N'_un = rmean(gdpsh_ic_ma`N'_unisic*)
egen lag1gdpsh_ic_ma`N'_un = rmean(lag1gdpsh_ic_ma`N'_unisic*)
egen lag2gdpsh_ic_ma`N'_un = rmean(lag2gdpsh_ic_ma`N'_unisic*)
}
keep year LAC country* *gdpsh*un
foreach N in 2 5 {
foreach X in ic mfg fire {
gen `X'2020_ma`N'_un = gdpsh_`X'_ma`N'_un
replace `X'2020_ma`N'_un = lag1gdpsh_`X'_ma`N'_un if `X'2020_ma`N'_un == . 
}
}
codebook *2020_ma5_un if year == 2020 
* 38 for fire
* 78 for ic 
codebook *2020_ma5_un if year == 2020 & LAC == 1
* 3 for LAC
codebook gdpsh_fire_ma5_un if year >= 1960
codebook gdpsh_fire_ma5_un if year == 1960
codebook gdpsh_ic_ma5_un if year >= 1960
codebook gdpsh_ic_ma5_un if year == 1960
drop lag*
label var gdpsh_mfg_ma2_un "GDP share mfg t UN-SNA MA2"
label var gdpsh_mfg_ma5_un "GDP share mfg t UN-SNA MA5"
label var gdpsh_fire_ma2_un "GDP share fire t UN-SNA MA2"
label var gdpsh_fire_ma5_un "GDP share fire t UN-SNA MA5"
label var gdpsh_ic_ma2_un "GDP share ic t UN-SNA MA2"
label var gdpsh_ic_ma5_un "GDP share ic t UN-SNA MA5"
label var mfg2020_ma2_un "GDP share mfg 2020 UN-SNA MA2"
label var mfg2020_ma5_un "GDP share mfg 2020 UN-SNA MA5"
label var fire2020_ma2_un "GDP share fire 2020 UN-SNA MA2"
label var fire2020_ma5_un "GDP share fire 2020 UN-SNA MA5"
label var ic2020_ma2_un "GDP share ic 2020 UN-SNA MA2"
label var ic2020_ma5_un "GDP share ic 2020 UN-SNA MA5"
sort country_wup year
save gdpsh_unsna, replace

******************
*** UN SNA-AMA ***
******************

* MFG 1970-2019 * 
clear
import excel "UN_SNA_AMA.xlsx", sheet("Download-GDPcurrent-NCU-countri") firstrow clear
tab Indicator
keep if Indicator == "Manufacturing (ISIC D)" 
keep Country y*
ren Country country
reshape long y, i(country) j(year)
ren y mfg
sort country year
save mfg, replace

* TOTAL 1970-2019 * 
clear
import excel "UN_SNA_AMA.xlsx", sheet("Download-GDPcurrent-NCU-countri") firstrow clear
tab Indicator
keep if Indicator == "Total Value Added" 
keep Country y*
ren Country country
reshape long y, i(country) j(year)
ren y total
sort country year
save total, replace

* SERVICES 1970-2019 * 
clear
import excel "UN_SNA_AMA.xlsx", sheet("Download-GDPcurrent-NCU-countri") firstrow clear
tab Indicator
keep if Indicator == "Wholesale, retail trade, restaurants and hotels (ISIC G-H)" | Indicator == "Transport, storage and communication (ISIC I)" | Indicator == "Other Activities (ISIC J-P)" 
tab Indicator
collapse (sum) y*, by(Country)
keep Country y*
ren Country country
reshape long y, i(country) j(year)
ren y service
sort country year
save service, replace

* WE MERGE * 
use total, clear
sort country year
merge country year using mfg
tab _m
drop _m
sort country year
merge country year using service
tab _m
drop _m
gen gdpsh_mfg_snaama = mfg/total*100
gen gdpsh_serv_snaama = service/total*100
sum gdpsh_*, d
tab country year if gdpsh_mfg != . & gdpsh_mfg < 0
* ok, we ignore
drop total mfg service
sort country year
save snaama, replace

* We use two-year moving average.
use snaama, clear
sort country year
bysort country: gen lag1gdpsh_mfg_snaama = gdpsh_mfg_snaama[_n-1]
bysort country: gen lag2gdpsh_mfg_snaama = gdpsh_mfg_snaama[_n-2]
bysort country: gen lead1gdpsh_mfg_snaama = gdpsh_mfg_snaama[_n+1]
bysort country: gen lead2gdpsh_mfg_snaama = gdpsh_mfg_snaama[_n+2]
* We use ma2
egen gdpsh_mfg_snaama_ma2 = rmean(*gdpsh_mfg_snaama)
egen gdpsh_serv_snaama_ma2 = rmean(*gdpsh_serv_snaama)
keep country year *_ma2
replace year = 2020 if year == 2019
gen lastyr = substr(string(year),4,1)
tab lastyr
keep if lastyr == "0" | lastyr == "5"
drop last 
gen country_gjv = country
replace country_gjv = "Cape Verde" if country_gjv == "Cabo Verde"
replace country_gjv = "Cote d'Ivoire" if country_gjv == "Côte d'Ivoire"
replace country_gjv = "Democratic Republic of the Congo" if country_gjv == "D.R. of the Congo"
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Lao People's DR"
replace country_gjv = "Swaziland" if country_gjv == "Eswatini"
*replace country_gjv = "" if country_gjv == "Taiwan"
replace country_gjv = "United Republic of Tanzania" if country_gjv == "U.R. of Tanzania: Mainland"
* Former Sudan *
* We reconstruct using Sudan and South Sudan and their populations in 2015 and 2020.
replace gdpsh_mfg_snaama_ma2 = 5.91204775501644 if country == "Sudan (Former)" & year == 2015
replace gdpsh_mfg_snaama_ma2 = 7.16858278276562 if country == "Sudan (Former)" & year == 2020
replace gdpsh_serv_snaama_ma2 = 51.5597411086387 if country == "Sudan (Former)" & year == 2015
replace gdpsh_serv_snaama_ma2 = 54.7828511775321 if country == "Sudan (Former)" & year == 2020
drop if country_gjv == "Sudan"
replace country_gjv = "Sudan" if country_gjv == "Sudan (Former)"
* Yemen *
* We reconstruct using their populations ca 1990 for the weights.
replace gdpsh_mfg_snaama_ma2 = 8.872867871 if country == "Yemen" & year == 1970
replace gdpsh_mfg_snaama_ma2 = 7.871325642 if country == "Yemen" & year == 1975
replace gdpsh_mfg_snaama_ma2 = 7.803905903 if country == "Yemen" & year == 1980
replace gdpsh_mfg_snaama_ma2 = 10.49121534 if country == "Yemen" & year == 1985
replace gdpsh_serv_snaama_ma2 = 38.94002802 if country == "Yemen" & year == 1970
replace gdpsh_serv_snaama_ma2 = 47.615581 if country == "Yemen" & year == 1975
replace gdpsh_serv_snaama_ma2 = 52.22905764 if country == "Yemen" & year == 1980
replace gdpsh_serv_snaama_ma2 = 55.81330101 if country == "Yemen" & year == 1985
sort country_gjv year
save snaama2, replace

* WE MERGE WITH THE LIST *
use "list.dta", clear
keep if year >= 1970
sort country_gjv year
merge country_gjv year using snaama2
tab _m
tab country_gjv if _m == 1
tab country_gjv if _m == 2
drop if _m == 1
drop if _m == 2
drop _m
codebook *_ma2
gen countryyear = country+" "+string(year)
tab countryyear if gdpsh_mfg_snaama_ma2 == .
keep country_gjv year gdpsh_*_snaama_ma2
sort country_gjv year 
save gdpsh_unsnaama, replace






